In this part, we display the presented results of Table 3 and Table 4:

PRED-LD

# Imports
library(readxl)
library(readr)
library(dplyr)



# ADHD dataset
ADHD <- read.delim(
  "imp_real_values_harm_new_input/ADHD_chrom1_real_values_new_input.txt", 
  sep = "\t", 
  
  stringsAsFactors = FALSE 
)

# CAD dataset
CAD <- read.delim(
  "imp_real_values_harm_new_input/CAD_EUR_chr1_3925_real_values_new_input.txt", 
  sep = "\t", 
  
  stringsAsFactors = FALSE 
)

# Colorectal Cancer dataset
Colorectal_Cancer <- read.delim(
  "imp_real_values_harm_new_input/Colorectal_Cancer_4097_EAS_chrom1_real_values_new_input.txt", 
  sep = "\t", 
  
  stringsAsFactors = FALSE 
)

# Double Eyelid dataset
Double_Eyelid <- read.delim(
  "imp_real_values_harm_new_input/Double_eylid_4021_EAS_chorm1_real_values_new_input.txt", 
  sep = "\t", 
  
  stringsAsFactors = FALSE 
)

# Epilepsy dataset
Epilepsy <- read.delim(
  "imp_real_values_harm_new_input/Epilepsy_chrom1_real_values_new_input.txt", 
  sep = "\t", 
  
  stringsAsFactors = FALSE 
)

# GFR_AFR dataset
GFR_AFR <- read.delim(
  "imp_real_values_harm_new_input/GFR_AFR_4054_chrom1_real_values_new_input.txt", 
  sep = "\t", 
  
  stringsAsFactors = FALSE )

# GFR_EUR dataset
GFR_EUR <- read.delim(
  "imp_real_values_harm_new_input/GFR_EUR_4053_chrom1_real_values_new_input.txt", 
  sep = "\t", 
  
  stringsAsFactors = FALSE )

# UACR dataset
UACR <- read.delim(
  "imp_real_values_harm_new_input/UACR_chrom1_real_values_new_input.txt", 
  sep = "\t", 
  
  stringsAsFactors = FALSE )


analyze_imputation <- function(true_file, imputed_file, trait_name, method_name, delim = "\t") {
  
  get_complement <- function(allele) {
    case_when(
      allele == "A" ~ "T",
      allele == "T" ~ "A",
      allele == "C" ~ "G",
      allele == "G" ~ "C",
      
    )
  }
  
  # Read the "true" data
  true_data <- read_delim(true_file, delim = delim, escape_double = FALSE, trim_ws = TRUE)
  #true_data$z <- true_data$beta/true_data$SE
 
  # Read the "imputed" data
  imputed_data <- read_delim(imputed_file, delim = delim, escape_double = FALSE, trim_ws = TRUE)
 
  imputed_data<- na.omit(imputed_data)
    
  # Filter for imputed rows
  imputed_data <- imputed_data[imputed_data$imputed == 1, ]
  imputed_data <- imputed_data[imputed_data$R2 >= 0.5, ]
  
  
  # Calculate number of imputed SNPs
  imputed_snps <- nrow(imputed_data)
  
  # Join data on "snp"
  joined_data <- inner_join(true_data, imputed_data, by = "snp", suffix = c("_true", "_imp"))
 
  # Add complement alleles to the dataset
  joined_data <- joined_data %>%
    mutate(
      REF_complement = get_complement(A2_imp),
      ALT_complement = get_complement(A1_imp)
    )
  # Modify z values
  joined_data <-joined_data %>%
    mutate(
      z_true = case_when(
        A1_imp == A1_true & A2_imp == A2_true ~ z_true,
        A1_imp == A2_true & A2_imp == A1_true ~ -z_true,
        ALT_complement == A1_true & REF_complement == A2_true~ z_true,
        ALT_complement == A2_true & REF_complement == A1_true ~ -z_true,
        TRUE ~ NA_real_
      )
    )
  
  
  
  # Calculate coverage percentage
  coverage <- (nrow(joined_data) / nrow(true_data)) * 100
  
  # Linear model for Z-scores
  z_model <- lm(joined_data$z_imp ~ joined_data$z_true)
  r2_z <- summary(z_model)$r.squared
 
  
  # Create the scatter plot
  plot(
    joined_data$z_true, joined_data$z_imp,
    main = paste0(trait_name, " - ", method_name, " (R² = ", round(r2_z, 4), ")"),
    xlab = "Actual z values",
    ylab = "Imputed z values",
    pch = 20,
    col = 'black'
  )
  grid()
  abline(a = 0, b = 1, col = 'red', lwd = 2)
  
  # Calculate -log10(p) values
  joined_data <- joined_data %>%
    mutate(log10p_true = -log10(2 * (pnorm(-abs(z_true)))),
           log10p_imp = -log10(2 * (pnorm(-abs(z_imp)))))
  
  # Linear model for -log10(p)
  log10p_model <- lm(joined_data$log10p_imp ~ joined_data$log10p_true)
  r2_log10p <- summary(log10p_model)$r.squared
  
  
  # Create the scatter plot
  plot(
    joined_data$log10p_true, joined_data$log10p_imp,
    main = paste0(trait_name, " - ", method_name, " (R² = ", round(r2_log10p, 4), ")"),
    xlab = "Actual -log10(p) values",
    ylab = "Imputed -log10(p) values",
    pch = 20,
    col = 'black'
  )
  grid()
  abline(a = 0, b = 1, col = 'red', lwd = 2)
  
  
  # Return results as a dataframe row
  return(data.frame(
    Trait = trait_name,
    Method = method_name,
    Imputed_SNPs = imputed_snps,
    R2_z = r2_z,
    R2_log10p = r2_log10p,
    Masked_SNPs_Found = nrow(joined_data),

    Imputation_Percentage = coverage
  ))
}

# Define datasets as a list of traits and corresponding file paths
datasets <- list(
  list(
    true_file = "imp_real_values_harm_new_input/ADHD_chrom1_real_values_new_input.txt",
    imputed_file = "NEW_RESULTS_HARM_FINAL/PRED_LD_imputation_results_ADHD_chr1_TOP_LD_harm_NEW_AF2.txt",
    trait_name = "ADHD",
    method_name = "TOP_LD"
  ),
  list(
    true_file = "imp_real_values_harm_new_input/CAD_EUR_chr1_3925_real_values_new_input.txt",
    imputed_file = "NEW_RESULTS_HARM_FINAL/PRED_LD_imputation_results_CAD_chr1_TOP_LD_harm_NEW_AF2.txt",
    trait_name = "CAD",
    method_name = "TOP_LD"
  ),
  list(
    true_file = "imp_real_values_harm_new_input/Colorectal_Cancer_4097_EAS_chrom1_real_values_new_input.txt",
    imputed_file = "NEW_RESULTS_HARM_FINAL/PRED_LD_imputation_results_COLCANCER_chr1_TOP_LD_harm_NEW_AF2.txt",
    trait_name = "Colorectal_Cancer",
    method_name = "TOP_LD"
  ),
  list(
    true_file = "imp_real_values_harm_new_input/Double_eylid_4021_EAS_chorm1_real_values_new_input.txt",
    imputed_file = "NEW_RESULTS_HARM_FINAL/PRED_LD_imputation_results_DLID_chr1_TOP_LD_harm_NEW_AF2.txt",
    trait_name = "Double_Eyelid",
    method_name = "TOP_LD"
  ),
  list(
    true_file = "imp_real_values_harm_new_input/Epilepsy_chrom1_real_values_new_input.txt",
    imputed_file = "NEW_RESULTS_HARM_FINAL/PRED_LD_imputation_results_EP_chr1_TOP_LD_harm_NEW_AF2.txt",
    trait_name = "Epilepsy",
    method_name = "TOP_LD"
  ),
  list(
    true_file = "imp_real_values_harm_new_input/GFR_AFR_4054_chrom1_real_values_new_input.txt",
    imputed_file = "NEW_RESULTS_HARM_FINAL/PRED_LD_imputation_results_GFRAFR_chr1_TOP_LD_harm_NEW_AF2.txt",
    trait_name = "GFR_AFR",
    method_name = "TOP_LD"
  ),
  list(
    true_file = "imp_real_values_harm_new_input/GFR_EUR_4053_chrom1_real_values_new_input.txt",
    imputed_file = "NEW_RESULTS_HARM_FINAL/PRED_LD_imputation_results_GFR_chr1_TOP_LD_harm_NEW_AF2.txt",
    trait_name = "GFR_EUR",
    method_name = "TOP_LD"
  ),
  list(
    true_file = "imp_real_values_harm_new_input/UACR_chrom1_real_values_new_input.txt",
    imputed_file = "NEW_RESULTS_HARM_FINAL/PRED_LD_imputation_results_UACR_chr1_TOP_LD_harm_NEW_AF2.txt",
    trait_name = "UACR",
    method_name = "TOP_LD"
  )
)


# Analyze all datasets and collect results
results <- do.call(rbind, lapply(datasets, function(dataset) {
  analyze_imputation(
    true_file = dataset$true_file,
    imputed_file = dataset$imputed_file,
    trait_name = dataset$trait_name,
    method_name = dataset$method_name
  )
}))

# Define the desired order of the Trait column
desired_order <- c("ADHD", "UACR", "GFR_EUR", "GFR_AFR", "Epilepsy", 
                   "Colorectal_Cancer", "Double_Eyelid", "CAD")

# Reorder the dataframe
results <- results %>%
  mutate(Trait = factor(Trait, levels = desired_order)) %>%
  arrange(Trait)

# Display the sorted dataframe
print(results)
##               Trait Method Imputed_SNPs      R2_z R2_log10p Masked_SNPs_Found
## 1              ADHD TOP_LD       243062 0.7192522 0.5450747              1484
## 2              UACR TOP_LD        64371 0.8663770 0.7488910               112
## 3           GFR_EUR TOP_LD        57200 0.6747569 0.6258257               114
## 4           GFR_AFR TOP_LD        41834 0.8513797 0.5925031                83
## 5          Epilepsy TOP_LD       440870 0.9244751 0.8603947             67727
## 6 Colorectal_Cancer TOP_LD       378395 0.8235658 0.7720981             10807
## 7     Double_Eyelid TOP_LD       274356 0.6284068 0.4278044              3697
## 8               CAD TOP_LD       682449 0.9250690 0.8941464            108464
##   Imputation_Percentage
## 1             39.145344
## 2              6.157229
## 3              8.976378
## 4              7.155172
## 5             91.822015
## 6             62.685615
## 7             47.276215
## 8             77.873666
print(mean(results$Imputed_SNPs))
## [1] 272817.1
print(mean(results$R2_z))
## [1] 0.8016603
print(mean(results$R2_log10p))
## [1] 0.6833423
print(mean(results$Imputation_Percentage))
## [1] 42.63645
# Define datasets as a list of traits and corresponding file paths
datasets_PhenoScanner <- list(
  list(
    true_file = "imp_real_values_harm_new_input/ADHD_chrom1_real_values_new_input.txt",
    imputed_file = "NEW_RESULTS_HARM_FINAL/PRED_LD_imputation_results_ADHD_chr1_Pheno_Scanner_harm.txt",
    trait_name = "ADHD",
    method_name = "Pheno_Scanner"
  ),
  list(
    true_file = "imp_real_values_harm_new_input/CAD_EUR_chr1_3925_real_values_new_input.txt",
    imputed_file = "NEW_RESULTS_HARM_FINAL/PRED_LD_imputation_results_CAD_chr1_Pheno_Scanner_harm.txt",
    trait_name = "CAD",
    method_name = "Pheno_Scanner"
  ),
  list(
    true_file = "imp_real_values_harm_new_input/Colorectal_Cancer_4097_EAS_chrom1_real_values_new_input.txt",
    imputed_file = "NEW_RESULTS_HARM_FINAL/PRED_LD_imputation_results_COLCANCER_chr1_Pheno_Scanner_harm.txt",
    trait_name = "Colorectal_Cancer",
    method_name = "Pheno_Scanner"
  ),
  list(
    true_file = "imp_real_values_harm_new_input/Double_eylid_4021_EAS_chorm1_real_values_new_input.txt",
    imputed_file = "NEW_RESULTS_HARM_FINAL/PRED_LD_imputation_results_DLID_chr1_Pheno_Scanner_harm.txt",
    trait_name = "Double_Eyelid",
    method_name = "Pheno_Scanner"
  ),
  list(
    true_file = "imp_real_values_harm_new_input/Epilepsy_chrom1_real_values_new_input.txt",
    imputed_file = "NEW_RESULTS_HARM_FINAL/PRED_LD_imputation_results_EP_chr1_Pheno_Scanner_harm.txt",
    trait_name = "Epilepsy",
    method_name = "Pheno_Scanner"
  ),
  list(
    true_file = "imp_real_values_harm_new_input/GFR_AFR_4054_chrom1_real_values_new_input.txt",
    imputed_file = "NEW_RESULTS_HARM_FINAL/PRED_LD_imputation_results_GFRAFR_chr1_Pheno_Scanner_harm.txt",
    trait_name = "GFR_AFR",
    method_name = "Pheno_Scanner"
  ),
  list(
    true_file = "imp_real_values_harm_new_input/GFR_EUR_4053_chrom1_real_values_new_input.txt",
    imputed_file = "NEW_RESULTS_HARM_FINAL/PRED_LD_imputation_results_GFR_chr1_Pheno_Scanner_harm.txt",
    trait_name = "GFR_EUR",
    method_name = "Pheno_Scanner"
  ),
  list(
    true_file = "imp_real_values_harm_new_input/UACR_chrom1_real_values_new_input.txt",
    imputed_file = "NEW_RESULTS_HARM_FINAL/PRED_LD_imputation_results_UACR_chr1_Pheno_Scanner_harm.txt",
    trait_name = "UACR",
    method_name = "Pheno_Scanner"
  )
)



# Analyze all datasets and collect results
results_Pheno_Scanner <- do.call(rbind, lapply(datasets_PhenoScanner, function(dataset) {
  analyze_imputation(
    true_file = dataset$true_file,
    imputed_file = dataset$imputed_file,
    trait_name = dataset$trait_name,
    method_name = dataset$method_name
  )
}))

# Define the desired order of the Trait column
desired_order <- c("ADHD", "UACR", "GFR_EUR", "GFR_AFR", "Epilepsy", 
                   "Colorectal_Cancer", "Double_Eyelid", "CAD")

# Reorder the dataframe
results_Pheno_Scanner <- results_Pheno_Scanner %>%
  mutate(Trait = factor(Trait, levels = desired_order)) %>%
  arrange(Trait)

 
print(results_Pheno_Scanner)
##               Trait        Method Imputed_SNPs      R2_z R2_log10p
## 1              ADHD Pheno_Scanner       277745 0.9138511 0.8592616
## 2              UACR Pheno_Scanner        53201 0.9418549 0.9453670
## 3           GFR_EUR Pheno_Scanner        52243 0.8571522 0.7898745
## 4           GFR_AFR Pheno_Scanner        19155 0.8761950 0.7567467
## 5          Epilepsy Pheno_Scanner       460371 0.9426859 0.8945793
## 6 Colorectal_Cancer Pheno_Scanner       339280 0.9172028 0.8765409
## 7     Double_Eyelid Pheno_Scanner       281966 0.8032283 0.6611157
## 8               CAD Pheno_Scanner       589504 0.9600094 0.9387779
##   Masked_SNPs_Found Imputation_Percentage
## 1               877             23.133738
## 2               112              6.157229
## 3               115              9.055118
## 4                40              3.448276
## 5             70121             95.067721
## 6             10560             61.252900
## 7              3270             41.815857
## 8            108162             77.656840
print(mean(results_Pheno_Scanner$Imputed_SNPs))
## [1] 259183.1
print(mean(results_Pheno_Scanner$R2_z))
## [1] 0.9015224
print(mean(results_Pheno_Scanner$R2_log10p))
## [1] 0.8402829
print(mean(results_Pheno_Scanner$Imputation_Percentage))
## [1] 39.69846
# Define datasets as a list of traits and corresponding file paths
datasets_HapMap <- list(
  list(
    true_file = "imp_real_values_harm_new_input/ADHD_chrom1_real_values_new_input.txt",
    imputed_file = "NEW_RESULTS_HARM_FINAL/PRED_LD_imputation_results_ADHD_chr1_CEU_HapMap_harm.txt",
    trait_name = "ADHD",
    method_name = "HapMap"
  ),
  
  
  list(
    true_file = "imp_real_values_harm_new_input/ADHD_chrom1_real_values_new_input.txt",
    imputed_file = "NEW_RESULTS_HARM_FINAL/PRED_LD_imputation_results_ADHD_chr1_TSI_HapMap_harm.txt",
    trait_name = "ADHD",
    method_name = "HapMap"
  ),
  
  list(
    true_file = "imp_real_values_harm_new_input/CAD_EUR_chr1_3925_real_values_new_input.txt",
    imputed_file = "NEW_RESULTS_HARM_FINAL/PRED_LD_imputation_results_CAD_chr1_TSI_HapMap_harm.txt",
    trait_name = "CAD",
    method_name = "HapMap"
  ),
  
  list(
    true_file = "imp_real_values_harm_new_input/CAD_EUR_chr1_3925_real_values_new_input.txt",
    imputed_file = "NEW_RESULTS_HARM_FINAL/PRED_LD_imputation_results_CAD_chr1_CEU_HapMap_harm.txt",
    trait_name = "CAD",
    method_name = "HapMap"
  ),
  
  
  list(
    true_file = "imp_real_values_harm_new_input/Colorectal_Cancer_4097_EAS_chrom1_real_values_new_input.txt",
    imputed_file = "NEW_RESULTS_HARM_FINAL/PRED_LD_imputation_results_COLCANCER_chr1_CHB_HapMap_harm.txt",
    trait_name = "Colorectal_Cancer",
    method_name = "HapMap"
  ),
  
  list(
    true_file = "imp_real_values_harm_new_input/Colorectal_Cancer_4097_EAS_chrom1_real_values_new_input.txt",
    imputed_file = "NEW_RESULTS_HARM_FINAL/PRED_LD_imputation_results_COLCANCER_chr1_JPT_HapMap_harm.txt",
    trait_name = "Colorectal_Cancer",
    method_name = "HapMap"
  ),
  
  
  list(
    true_file = "imp_real_values_harm_new_input/Colorectal_Cancer_4097_EAS_chrom1_real_values_new_input.txt",
    imputed_file = "NEW_RESULTS_HARM_FINAL/PRED_LD_imputation_results_COLCANCER_chr1_CHD_HapMap_harm.txt",
    trait_name = "Colorectal_Cancer",
    method_name = "HapMap"
  ),
  
  
  

  
  list(
    true_file = "imp_real_values_harm_new_input/Double_eylid_4021_EAS_chorm1_real_values_new_input.txt",
    imputed_file = "NEW_RESULTS_HARM_FINAL/PRED_LD_imputation_results_DLID_chr1_CHB_HapMap_harm.txt",
    trait_name = "Double_Eyelid",
    method_name = "HapMap"
  ),
  
  
  list(
    true_file = "imp_real_values_harm_new_input/Double_eylid_4021_EAS_chorm1_real_values_new_input.txt",
    imputed_file = "NEW_RESULTS_HARM_FINAL/PRED_LD_imputation_results_DLID_chr1_JPT_HapMap_harm.txt",
    trait_name = "Double_Eyelid",
    method_name = "HapMap"
  ),
  
  list(
    true_file = "imp_real_values_harm_new_input/Double_eylid_4021_EAS_chorm1_real_values_new_input.txt",
    imputed_file = "NEW_RESULTS_HARM_FINAL/PRED_LD_imputation_results_DLID_chr1_CHD_HapMap_harm.txt",
    trait_name = "Double_Eyelid",
    method_name = "HapMap"
  ),
  
  
  
  
  list(
    true_file = "imp_real_values_harm_new_input/Epilepsy_chrom1_real_values_new_input.txt",
    imputed_file = "NEW_RESULTS_HARM_FINAL/PRED_LD_imputation_results_EP_chr1_CEU_HapMap_harm.txt",
    trait_name = "Epilepsy",
    method_name = "HapMap"
  ),
  
  list(
    true_file = "imp_real_values_harm_new_input/Epilepsy_chrom1_real_values_new_input.txt",
    imputed_file = "NEW_RESULTS_HARM_FINAL/PRED_LD_imputation_results_EP_chr1_TSI_HapMap_harm.txt",
    trait_name = "Epilepsy",
    method_name = "HapMap"
  ),
  
  
  
  list(
    true_file = "imp_real_values_harm_new_input/GFR_AFR_4054_chrom1_real_values_new_input.txt",
    imputed_file = "NEW_RESULTS_HARM_FINAL/PRED_LD_imputation_results_GFRAFR_chr1_ASW_HapMap_harm.txt",
    trait_name = "GFR_AFR",
    method_name = "HapMap"
  ),
  
  
  list(
    true_file = "imp_real_values_harm_new_input/GFR_AFR_4054_chrom1_real_values_new_input.txt",
    imputed_file = "NEW_RESULTS_HARM_FINAL/PRED_LD_imputation_results_GFRAFR_chr1_LWK_HapMap_harm.txt",
    trait_name = "GFR_AFR",
    method_name = "HapMap"
  ),
  
  
  list(
    true_file = "imp_real_values_harm_new_input/GFR_AFR_4054_chrom1_real_values_new_input.txt",
    imputed_file = "NEW_RESULTS_HARM_FINAL/PRED_LD_imputation_results_GFRAFR_chr1_MKK_HapMap_harm.txt",
    trait_name = "GFR_AFR",
    method_name = "HapMap"
  ),
  
  
  list(
    true_file = "imp_real_values_harm_new_input/GFR_AFR_4054_chrom1_real_values_new_input.txt",
    imputed_file = "NEW_RESULTS_HARM_FINAL/PRED_LD_imputation_results_GFRAFR_chr1_YRI_HapMap_harm.txt",
    trait_name = "GFR_AFR",
    method_name = "HapMap"
  ),
  
  
  
  
  
  list(
    true_file = "imp_real_values_harm_new_input/GFR_EUR_4053_chrom1_real_values_new_input.txt",
    imputed_file = "NEW_RESULTS_HARM_FINAL/PRED_LD_imputation_results_GFR_chr1_CEU_HapMap_harm.txt",
    trait_name = "GFR_EUR",
    method_name = "HapMap"
  ),
  list(
    true_file = "imp_real_values_harm_new_input/GFR_EUR_4053_chrom1_real_values_new_input.txt",
    imputed_file = "NEW_RESULTS_HARM_FINAL/PRED_LD_imputation_results_GFR_chr1_TSI_HapMap_harm.txt",
    trait_name = "GFR_EUR",
    method_name = "HapMap"
  ),
  
  
  list(
    true_file = "imp_real_values_harm_new_input/UACR_chrom1_real_values_new_input.txt",
    imputed_file = "NEW_RESULTS_HARM_FINAL/PRED_LD_imputation_results_UACR_chr1_CEU_HapMap_harm.txt",
    trait_name = "UACR",
    method_name = "HapMap"
  ),
  
  list(
    true_file = "imp_real_values_harm_new_input/UACR_chrom1_real_values_new_input.txt",
    imputed_file = "NEW_RESULTS_HARM_FINAL/PRED_LD_imputation_results_UACR_chr1_TSI_HapMap_harm.txt",
    trait_name = "UACR",
    method_name = "HapMap"
  )
)

results_HapMap <- do.call(rbind, lapply(datasets_HapMap, function(dataset) {
  analyze_imputation(
    true_file = dataset$true_file,
    imputed_file = dataset$imputed_file,
    trait_name = dataset$trait_name,
    method_name = dataset$method_name
  )
}))

# Define the desired order of the Trait column
desired_order <- c("ADHD", "UACR", "GFR_EUR", "GFR_AFR", "Epilepsy", 
                   "Colorectal_Cancer", "Double_Eyelid", "CAD")


# Reorder the dataframe
results_HapMap <- results_HapMap %>%
  mutate(Trait = factor(Trait, levels = desired_order)) %>%
  arrange(Trait)

 
print(results_HapMap)
##                Trait Method Imputed_SNPs      R2_z R2_log10p Masked_SNPs_Found
## 1               ADHD HapMap        44510 0.5689947 0.4720841              1146
## 2               ADHD HapMap        43810 0.6621255 0.5092777              1123
## 3               UACR HapMap         7161 0.8597085 0.8183449                47
## 4               UACR HapMap         7061 0.5458129 0.8169917                50
## 5            GFR_EUR HapMap         7200 0.5308537 0.4901625                50
## 6            GFR_EUR HapMap         7076 0.7255240 0.6059185                51
## 7            GFR_AFR HapMap         3799 0.5860847 0.5279502                25
## 8            GFR_AFR HapMap         3289 0.7020338 0.4887175                21
## 9            GFR_AFR HapMap         3694 0.6424497 0.4635576                23
## 10           GFR_AFR HapMap         3400 0.8409470 0.5305692                20
## 11          Epilepsy HapMap        73686 0.7457929 0.6730226             13636
## 12          Epilepsy HapMap        73101 0.7953478 0.7006532             13509
## 13 Colorectal_Cancer HapMap        40336 0.7603346 0.7322579              1646
## 14 Colorectal_Cancer HapMap        40609 0.7723229 0.7611019              1627
## 15 Colorectal_Cancer HapMap        39193 0.8204848 0.7908324              1553
## 16     Double_Eyelid HapMap        42239 0.5985798 0.5462494              1509
## 17     Double_Eyelid HapMap        42829 0.6249283 0.5433542              1580
## 18     Double_Eyelid HapMap        41244 0.7038363 0.5510504              1461
## 19               CAD HapMap        82190 0.8365034 0.8066661             16342
## 20               CAD HapMap        82278 0.7953648 0.7997694             16379
##    Imputation_Percentage
## 1              30.229491
## 2              29.622791
## 3               2.583837
## 4               2.748763
## 5               3.937008
## 6               4.015748
## 7               2.155172
## 8               1.810345
## 9               1.982759
## 10              1.724138
## 11             18.487235
## 12             18.315053
## 13              9.547564
## 14              9.437355
## 15              9.008121
## 16             19.296675
## 17             20.204604
## 18             18.682864
## 19             11.733031
## 20             11.759596
print(mean(results_HapMap$Imputed_SNPs))
## [1] 34435.25
print(mean(results_HapMap$R2_z))
## [1] 0.7059015
print(mean(results_HapMap$R2_log10p))
## [1] 0.6314266
print(mean(results_HapMap$Imputation_Percentage))
## [1] 11.36411
# Function to calculate and analyze imputation results
analyze_imputation <- function(true_file, imputed_file, trait_name, method_name, delim = "\t") {
  
  get_complement <- function(allele) {
    case_when(
      allele == "A" ~ "T",
      allele == "T" ~ "A",
      allele == "C" ~ "G",
      allele == "G" ~ "C",
      
    )
  }
  
  # Read the "true" data
  true_data <- read_delim(true_file, delim = delim, escape_double = FALSE, trim_ws = TRUE)
  #true_data$z <- true_data$beta/true_data$SE
  
  # Read the "imputed" data
  imputed_data <- read_delim(imputed_file, delim = delim, escape_double = FALSE, trim_ws = TRUE)
  
  imputed_data<- na.omit(imputed_data)
  
  # Filter for imputed rows
  imputed_data <- imputed_data[imputed_data$imputed == 1, ]
  imputed_data <- imputed_data[imputed_data$R2 >= 0.5, ]
  
  
  # Calculate number of imputed SNPs
  imputed_snps <- nrow(imputed_data)
  
  # Join data on "snp"
  joined_data <- inner_join(true_data, imputed_data, by = "snp", suffix = c("_true", "_imp"))
  
  # Add complement alleles to the dataset
  joined_data <- joined_data %>%
    mutate(
      REF_complement = get_complement(A2_imp),
      ALT_complement = get_complement(A1_imp)
    )
  # Modify z values
  joined_data <-joined_data %>%
    mutate(
      z_true = case_when(
        A1_imp == A1_true & A2_imp == A2_true ~ z_true,
        A1_imp == A2_true & A2_imp == A1_true ~ -z_true,
        ALT_complement == A1_true & REF_complement == A2_true~ z_true,
        ALT_complement == A2_true & REF_complement == A1_true ~ -z_true,
        TRUE ~ NA_real_
      )
    )
  
  
  
  # Calculate coverage percentage
  coverage <- (nrow(joined_data) / nrow(true_data)) * 100
  
  # Linear model for Z-scores
  z_model <- lm(joined_data$z_imp ~ joined_data$z_true)
  r2_z <- summary(z_model)$r.squared
  
  
  # Create the scatter plot
  plot(
    joined_data$z_true, joined_data$z_imp,
    main = paste0(trait_name, " - ", method_name, " (R² = ", round(r2_z, 4), ")"),
    xlab = "Actual z values",
    ylab = "Imputed z values",
    pch = 20,
    col = 'black'
  )
  grid()
  abline(a = 0, b = 1, col = 'red', lwd = 2)

  # Calculate -log10(p) values
  joined_data <- joined_data %>%
    mutate(log10p_true = -log10(2 * (pnorm(-abs(z_true)))),
           log10p_imp = -log10(2 * (pnorm(-abs(z_imp)))))

  # Linear model for -log10(p)
  log10p_model <- lm(joined_data$log10p_imp ~ joined_data$log10p_true)
  r2_log10p <- summary(log10p_model)$r.squared


  # Create the scatter plot
  plot(
    joined_data$log10p_true, joined_data$log10p_imp,
    main = paste0(trait_name, " - ", method_name, " (R² = ", round(r2_log10p, 4), ")"),
    xlab = "Actual -log10(p) values",
    ylab = "Imputed -log10(p) values",
    pch = 20,
    col = 'black'
  )
  grid()
  abline(a = 0, b = 1, col = 'red', lwd = 2)
  #
  
  # Return results as a dataframe row
  return(data.frame(
    Trait = trait_name,
    Method = method_name,
    Imputed_SNPs = imputed_snps,
    R2_z = r2_z,
    R2_log10p = r2_log10p,
    Masked_SNPs_Found = nrow(joined_data),

    Imputation_Percentage = coverage
  ))
}





# Define datasets as a list of traits and corresponding file paths
datasets_COMBINED <- list(
  list(
    true_file = "imp_real_values_harm_new_input/ADHD_chrom1_real_values_new_input.txt",
    imputed_file = "NEW_RESULTS_HARM_FINAL/PRED_LD_imputation_results_ADHD_chr1_COMBINED_PANELS_harm_NEW_AF2.txt",
    trait_name = "ADHD",
    method_name = "PRED_LD"
  ),
  list(
    true_file = "imp_real_values_harm_new_input/CAD_EUR_chr1_3925_real_values_new_input.txt",
    imputed_file = "NEW_RESULTS_HARM_FINAL/PRED_LD_imputation_results_CAD_chr1_COMBINED_PANELS_harm_NEW_AF2.txt",
    trait_name = "CAD",
    method_name = "PRED_LD"
  ),
  list(
    true_file = "imp_real_values_harm_new_input/Colorectal_Cancer_4097_EAS_chrom1_real_values_new_input.txt",
    imputed_file = "NEW_RESULTS_HARM_FINAL/PRED_LD_imputation_results_COLCANCER_chr1_COMBINED_PANELS_harm_NEW_AF2.txt",
    trait_name = "Colorectal_Cancer",
    method_name = "PRED_LD"
  ),
  list(
    true_file = "imp_real_values_harm_new_input/Double_eylid_4021_EAS_chorm1_real_values_new_input.txt",
    imputed_file = "NEW_RESULTS_HARM_FINAL/PRED_LD_imputation_results_DLID_chr1_COMBINED_PANELS_harm_NEW_AF2.txt",
    trait_name = "Double_Eyelid",
    method_name = "PRED_LD"
  ),
  list(
    true_file = "imp_real_values_harm_new_input/Epilepsy_chrom1_real_values_new_input.txt",
    imputed_file = "NEW_RESULTS_HARM_FINAL/PRED_LD_imputation_results_EP_chr1_COMBINED_PANELS_harm_NEW_AF2.txt",
    trait_name = "Epilepsy",
    method_name = "PRED_LD"
  ),
  list(
    true_file = "imp_real_values_harm_new_input/GFR_AFR_4054_chrom1_real_values_new_input.txt",
    imputed_file = "NEW_RESULTS_HARM_FINAL/PRED_LD_imputation_results_GFRAFR_chr1_COMBINED_PANELS_harm_NEW_AF2.txt",
    trait_name = "GFR_AFR",
    method_name = "PRED_LD"
  ),
  list(
    true_file = "imp_real_values_harm_new_input/GFR_EUR_4053_chrom1_real_values_new_input.txt",
    imputed_file = "NEW_RESULTS_HARM_FINAL/PRED_LD_imputation_results_GFR_chr1_COMBINED_PANELS_harm_NEW_AF2.txt",
    trait_name = "GFR_EUR",
    method_name = "PRED_LD"
  ),
  list(
    true_file = "imp_real_values_harm_new_input/UACR_chrom1_real_values_new_input.txt",
    imputed_file = "NEW_RESULTS_HARM_FINAL/PRED_LD_imputation_results_UACR_chr1_COMBINED_PANELS_harm_NEW_AF2.txt",
    trait_name = "UACR",
    method_name = "PRED_LD"
  )
)


results_COMBINED <- do.call(rbind, lapply(datasets_COMBINED, function(dataset) {
  analyze_imputation(
    true_file = dataset$true_file,
    imputed_file = dataset$imputed_file,
    trait_name = dataset$trait_name,
    method_name = dataset$method_name
  )
}))

desired_order <- c("ADHD", "UACR", "GFR_EUR", "GFR_AFR", "Epilepsy", 
                   "Colorectal_Cancer", "Double_Eyelid", "CAD")

# Reorder the dataframe
results_COMBINED <- results_COMBINED %>%
  mutate(Trait = factor(Trait, levels = desired_order)) %>%
  arrange(Trait)

 
print(results_COMBINED)
##               Trait  Method Imputed_SNPs      R2_z R2_log10p Masked_SNPs_Found
## 1              ADHD PRED_LD       345047 0.7010635 0.5836026              1894
## 2              UACR PRED_LD        91989 0.8859721 0.8748930               166
## 3           GFR_EUR PRED_LD        84689 0.7237233 0.6838377               165
## 4           GFR_AFR PRED_LD        51633 0.8381368 0.5858471                98
## 5          Epilepsy PRED_LD       516837 0.9271306 0.8730452             72589
## 6 Colorectal_Cancer PRED_LD       473097 0.8620196 0.8155308             12658
## 7     Double_Eyelid PRED_LD       367707 0.6648054 0.4990525              4709
## 8               CAD PRED_LD       779429 0.9351433 0.9085859            120683
##   Imputation_Percentage
## 1             49.960433
## 2              9.125893
## 3             12.992126
## 4              8.448276
## 5             98.413753
## 6             73.422274
## 7             60.217391
## 8             86.646516
print(mean(results_COMBINED$Imputed_SNPs))
## [1] 338803.5
print(mean(results_COMBINED$R2_z))
## [1] 0.8172493
print(mean(results_COMBINED$R2_log10p))
## [1] 0.7280493
print(mean(results_COMBINED$Imputation_Percentage))
## [1] 49.90333

RAISS

#Read all RAISS results
RAISS_ADHD <- read_delim("raiss_results/z_ADHD_chr1.txt", delim = "\t", escape_double = FALSE, trim_ws = TRUE)
RAISS_CAD <- read_delim("raiss_results/z_CAD_chr1.txt", delim = "\t", escape_double = FALSE, trim_ws = TRUE)
RAISS_C_CANCER <- read_delim("raiss_results/z_CCANCER_chr1.txt", delim = "\t", escape_double = FALSE, trim_ws = TRUE)
RAISS_D_LID <- read_delim("raiss_results/z_DLID_chr1.txt", delim = "\t", escape_double = FALSE, trim_ws = TRUE)
RAISS_EP <-  read_delim("raiss_results/z_EP_chr1.txt", delim = "\t", escape_double = FALSE, trim_ws = TRUE)
RAISS_GFR_AFR <- read_delim("raiss_results/z_GFRAFR_chr1.txt", delim = "\t", escape_double = FALSE, trim_ws = TRUE)
RAISS_GFR_EUR <- read_delim("raiss_results/z_GFREUR_chr1.txt", delim = "\t", escape_double = FALSE, trim_ws = TRUE)
RAISS_UACR <-  read_delim("raiss_results/z_UACR_chr1.txt", delim = "\t", escape_double = FALSE, trim_ws = TRUE)


#FILTER RAISS

#Read all RAISS results
RAISS_ADHD <-  RAISS_ADHD[RAISS_ADHD$imputed == 'TRUE',]
RAISS_CAD <-  RAISS_CAD[RAISS_CAD$imputed == 'TRUE',]
RAISS_C_CANCER <- RAISS_C_CANCER[RAISS_C_CANCER$imputed == 'TRUE',]
RAISS_D_LID <-  RAISS_D_LID[RAISS_D_LID$imputed == 'TRUE',]
RAISS_EP <-  RAISS_EP[RAISS_EP$imputed == 'TRUE',]
RAISS_GFR_AFR <- RAISS_GFR_AFR[RAISS_GFR_AFR$imputed == 'TRUE',] 
RAISS_GFR_EUR <- RAISS_GFR_EUR[RAISS_GFR_EUR$imputed == 'TRUE',]
RAISS_UACR <-   RAISS_UACR[RAISS_UACR$imputed == 'TRUE',]







# Rename RAISS datasets
colnames(RAISS_ADHD)[colnames(RAISS_ADHD) == "A0"] <- "A2"
colnames(RAISS_ADHD)[colnames(RAISS_ADHD) == "Z"] <- "z"
colnames(RAISS_ADHD)[colnames(RAISS_ADHD) == "rsID"] <- "snp"

colnames(RAISS_CAD)[colnames(RAISS_CAD) == "A0"] <- "A2"
colnames(RAISS_CAD)[colnames(RAISS_CAD) == "Z"] <- "z"
colnames(RAISS_CAD)[colnames(RAISS_CAD) == "rsID"] <- "snp"

colnames(RAISS_C_CANCER)[colnames(RAISS_C_CANCER) == "A0"] <- "A2"
colnames(RAISS_C_CANCER)[colnames(RAISS_C_CANCER) == "Z"] <- "z"
colnames(RAISS_C_CANCER)[colnames(RAISS_C_CANCER) == "rsID"] <- "snp"

colnames(RAISS_D_LID)[colnames(RAISS_D_LID) == "A0"] <- "A2"
colnames(RAISS_D_LID)[colnames(RAISS_D_LID) == "Z"] <- "z"
colnames(RAISS_D_LID)[colnames(RAISS_D_LID) == "rsID"] <- "snp"

colnames(RAISS_EP)[colnames(RAISS_EP) == "A0"] <- "A2"
colnames(RAISS_EP)[colnames(RAISS_EP) == "Z"] <- "z"
colnames(RAISS_EP)[colnames(RAISS_EP) == "rsID"] <- "snp"

colnames(RAISS_GFR_AFR)[colnames(RAISS_GFR_AFR) == "A0"] <- "A2"
colnames(RAISS_GFR_AFR)[colnames(RAISS_GFR_AFR) == "Z"] <- "z"
colnames(RAISS_GFR_AFR)[colnames(RAISS_GFR_AFR) == "rsID"] <- "snp"

colnames(RAISS_GFR_EUR)[colnames(RAISS_GFR_EUR) == "A0"] <- "A2"
colnames(RAISS_GFR_EUR)[colnames(RAISS_GFR_EUR) == "Z"] <- "z"
colnames(RAISS_GFR_EUR)[colnames(RAISS_GFR_EUR) == "rsID"] <- "snp"

colnames(RAISS_UACR)[colnames(RAISS_UACR) == "A0"] <- "A2"
colnames(RAISS_UACR)[colnames(RAISS_UACR) == "Z"] <- "z"
colnames(RAISS_UACR)[colnames(RAISS_UACR) == "rsID"] <- "snp"



# Function to calculate and analyze imputation results
analyze_imputation <- function(true_data, imputed_data, trait_name, method_name, delim = "\t") {

get_complement <- function(allele) {
  case_when(
    allele == "A" ~ "T",
    allele == "T" ~ "A",
    allele == "C" ~ "G",
    allele == "G" ~ "C",
    
  )
}
 


# Calculate number of imputed SNPs
imputed_snps <- nrow(imputed_data)

# Join data on "snp"
joined_data <- inner_join(true_data, imputed_data, by = "snp", suffix = c("_true", "_imp"))


# Add complement alleles to the dataset
joined_data <- joined_data %>%
  mutate(
    REF_complement = get_complement(A2_imp),
    ALT_complement = get_complement(A1_imp)
  )
# Modify z values
joined_data <-joined_data %>%
  mutate(
    z_true = case_when(
      A1_imp == A1_true & A2_imp == A2_true ~ z_true,
      A1_imp == A2_true & A2_imp == A1_true ~ -z_true,
      ALT_complement == A1_true & REF_complement == A2_true~ z_true,
      ALT_complement == A2_true & REF_complement == A1_true ~ -z_true,
      TRUE ~ NA_real_
    )
  )



# Calculate coverage percentage
coverage <- (nrow(joined_data) / nrow(true_data)) * 100

# Linear model for Z-scores
z_model <- lm(joined_data$z_imp ~ joined_data$z_true)
r2_z <- summary(z_model)$r.squared

# Create the scatter plot
plot(
  joined_data$z_true, joined_data$z_imp,
  main = paste0(trait_name, " - ", method_name, " (R² = ", round(r2_z, 4), ")"),
  xlab = "Actual z values",
  ylab = "Imputed z values",
  pch = 20,
  col = 'black'
)
grid()
abline(a = 0, b = 1, col = 'red', lwd = 2)

 
 # Calculate -log10(p) values
  joined_data <- joined_data %>%
    mutate(
      log10p_true = -log10( 2*(pnorm(-abs(z_imp)))),
      log10p_imp = -log10( 2*(pnorm(-abs(z_true)))) 
    )
  
  # Linear model for -log10(p)
  log10p_model <- lm(joined_data$log10p_imp ~ joined_data$log10p_true)
  r2_log10p <- summary(log10p_model)$r.squared
  
  
  # Create the scatter plot
  plot(
    joined_data$log10p_true, joined_data$log10p_imp,
    main = paste0(trait_name, " - ", method_name, " (R² = ", round(r2_log10p, 4), ")"),
    xlab = "Actual -log10(p) values",
    ylab = "Imputed -log10(p) values",
    pch = 20,
    col = 'black'
  )
  grid()
  abline(a = 0, b = 1, col = 'red', lwd = 2)
  
  
  
  # Return results as a dataframe row
  return(data.frame(
    Trait = trait_name,
    Method = method_name,
    Imputed_SNPs = imputed_snps,
    R2_z =  r2_z,
    R2_log10p=r2_log10p,
    Masked_SNPs_Found = nrow(joined_data),
    Imputation_Percentage = coverage
  ))



  }



# List of datasets, imputed files, and their corresponding traits and methods
datasets <- list(
 
  list("ADHD", ADHD, RAISS_ADHD, "RAISS"),
  
 
  list("CAD", CAD, RAISS_CAD, "RAISS"),
 
  list("Colorectal Cancer", Colorectal_Cancer, RAISS_C_CANCER, "RAISS"),
 
  list("Double Eyelid", Double_Eyelid, RAISS_D_LID, "RAISS"),
  
 
  list("Epilepsy", Epilepsy, RAISS_EP, "RAISS"),
  
 
  list("GFR_AFR", GFR_AFR, RAISS_GFR_AFR, "RAISS"),
 
  list("GFR_EUR", GFR_EUR, RAISS_GFR_EUR, "RAISS"),
 
  list("UACR", UACR, RAISS_UACR, "RAISS")
)

 
# Initialize an empty dataframe to store the results
results_df <- data.frame()

# Loop through datasets and run the analyze_imputation function
for (dataset in datasets) {
  trait <- dataset[[1]]
  true_data <- dataset[[2]]
  imputed_data <- dataset[[3]]
  method <- dataset[[4]]
  
  # Analyze the imputation
  result <- analyze_imputation(true_data, imputed_data, trait, method)
  
  # Convert the result to a dataframe and append it to results_df
  results_df <- rbind(results_df, as.data.frame(result))
  
 
}

# Define the desired order of the Trait column
desired_order <- c("ADHD", "UACR", "GFR_EUR", "GFR_AFR", "Epilepsy", 
                   "Colorectal Cancer", "Double Eyelid", "CAD")

# Reorder the dataframe
results_df <- results_df %>%
  mutate(Trait = factor(Trait, levels = desired_order)) %>%
  arrange(Trait)

# Display the sorted dataframe
print(results_df)
##               Trait Method Imputed_SNPs      R2_z R2_log10p Masked_SNPs_Found
## 1              ADHD  RAISS       213003 0.5761315 0.3660817              1504
## 2              UACR  RAISS        30052 0.8368564 0.8532327                80
## 3           GFR_EUR  RAISS        24284 0.8537633 0.8260136                69
## 4           GFR_AFR  RAISS         6465 0.4055610 0.1216244                18
## 5          Epilepsy  RAISS       148826 0.8722352 0.7374435             54835
## 6 Colorectal Cancer  RAISS       240299 0.8775192 0.7701230              8839
## 7     Double Eyelid  RAISS       238279 0.7059988 0.4709145              4418
## 8               CAD  RAISS       139507 0.8928083 0.8123519             74897
##   Imputation_Percentage
## 1             39.672910
## 2              4.398021
## 3              5.433071
## 4              1.551724
## 5             74.343470
## 6             51.270302
## 7             56.496164
## 8             53.773639
print(mean(results_df$Imputed_SNPs))
## [1] 130089.4
print(mean(results_df$R2_z))
## [1] 0.7526092
print(mean(results_df$R2_log10p))
## [1] 0.6197231
print(mean(results_df$Imputation_Percentage))
## [1] 35.86741

SSIMP and DIST

#Read all SSIMP results
SSIMP_ADHD <-  read_delim("SSIMP/SSIMP_results/SSIMP_ADHD_EUR_chr1.txt", delim = "\t", escape_double = FALSE, trim_ws = TRUE)
SSIMP_CAD <-read_delim("SSIMP/SSIMP_results/SSIMP_CAD_EUR_chr1.txt", delim = "\t", escape_double = FALSE, trim_ws = TRUE) 
SSIMP_C_CANCER <-  read_delim("SSIMP/SSIMP_results/SSIMP_COL_CANCER_EAS_chr1.txt", delim = "\t", escape_double = FALSE, trim_ws = TRUE)
SSIMP_D_LID <- read_delim("SSIMP/SSIMP_results/SSIMP_DLID_EAS_chr1.txt", delim = "\t", escape_double = FALSE, trim_ws = TRUE)
SSIMP_EP <-  read_delim("SSIMP/SSIMP_results/SSIMP_EPILEPSY_EUR_chr1.txt", delim = "\t", escape_double = FALSE, trim_ws = TRUE)
SSIMP_GFR_AFR <-  read_delim("SSIMP/SSIMP_results/SSIMP_GFR_AFR_chr1.txt", delim = "\t", escape_double = FALSE, trim_ws = TRUE)
SSIMP_GFR_EUR <-  read_delim("SSIMP/SSIMP_results/SSIMP_GFR_EUR_chr1.txt", delim = "\t", escape_double = FALSE, trim_ws = TRUE)
SSIMP_UACR <-  read_delim("SSIMP/SSIMP_results/SSIMP_UACR_EUR_chr1.txt", delim = "\t", escape_double = FALSE, trim_ws = TRUE)

#Read all DIST results
DIST_ADHD <-read_delim("DIST/DIST_results/DIST_ADHD_EUR_chr1.txt", delim = " ", escape_double = FALSE, trim_ws = TRUE)
DIST_CAD <-read_delim("DIST/DIST_results/DIST_CAD_EUR_chr1.txt", delim = " ", escape_double = FALSE, trim_ws = TRUE)
DIST_C_CANCER <-read_delim("DIST/DIST_results/DIST_COL_CANCER_EAS_chr1.txt", delim = " ", escape_double = FALSE, trim_ws = TRUE)
DIST_D_LID <-read_delim("DIST/DIST_results/DIST_D_LID_EAS_chr1.txt", delim = " ", escape_double = FALSE, trim_ws = TRUE)
DIST_EP <-read_delim("DIST/DIST_results/DIST_EPILPESY_EUR_chr1.txt", delim = " ", escape_double = FALSE, trim_ws = TRUE)
DIST_GFR_AFR <-read_delim("DIST/DIST_results/DIST_GFR_AFR_chr1.txt", delim = " ", escape_double = FALSE, trim_ws = TRUE)
DIST_GFR_EUR <-read_delim("DIST/DIST_results/DIST_GFR_EUR_chr1.txt", delim = " ", escape_double = FALSE, trim_ws = TRUE)
DIST_UACR <-read_delim("DIST/DIST_results/DIST_UACR_chr1.txt", delim = " ", escape_double = FALSE, trim_ws = TRUE)







#FILTER SSIMP
SSIMP_ADHD <- SSIMP_ADHD[SSIMP_ADHD$source == 'SSIMP',]
SSIMP_CAD <- SSIMP_CAD[SSIMP_CAD$source == 'SSIMP',]
SSIMP_C_CANCER <- SSIMP_C_CANCER[SSIMP_C_CANCER$source == 'SSIMP',]
SSIMP_D_LID <- SSIMP_D_LID[SSIMP_D_LID$source == 'SSIMP',]
SSIMP_EP <- SSIMP_EP[SSIMP_EP$source == 'SSIMP',]
SSIMP_GFR_AFR <- SSIMP_GFR_AFR[SSIMP_GFR_AFR$source == 'SSIMP',]
SSIMP_GFR_EUR <- SSIMP_GFR_EUR[SSIMP_GFR_EUR$source == 'SSIMP',]
SSIMP_UACR <- SSIMP_UACR[SSIMP_UACR$source == 'SSIMP',]






#FILTER DIST
DIST_ADHD <- DIST_ADHD[DIST_ADHD$type == 0,]
DIST_CAD <- DIST_CAD[DIST_CAD$type == 0,]
DIST_C_CANCER <- DIST_C_CANCER[DIST_C_CANCER$type == 0,]
DIST_D_LID <- DIST_D_LID[DIST_D_LID$type == 0,]
DIST_EP <- DIST_EP[DIST_EP$type == 0,]
DIST_GFR_AFR <- DIST_GFR_AFR[DIST_GFR_AFR$type == 0,]
DIST_GFR_EUR <- DIST_GFR_EUR[DIST_GFR_EUR$type == 0,]
DIST_UACR <- DIST_UACR[DIST_UACR$type == 0,]



# Rename SSIMP datasets
colnames(SSIMP_ADHD)[colnames(SSIMP_ADHD) == "Allele1"] <- "A2"
colnames(SSIMP_ADHD)[colnames(SSIMP_ADHD) == "Allele2"] <- "A1"
colnames(SSIMP_ADHD)[colnames(SSIMP_ADHD) == "z_imp"] <- "z"
colnames(SSIMP_ADHD)[colnames(SSIMP_ADHD) == "SNP"] <- "snp"

colnames(SSIMP_CAD)[colnames(SSIMP_CAD) == "Allele1"] <- "A2"
colnames(SSIMP_CAD)[colnames(SSIMP_CAD) == "Allele2"] <- "A1"
colnames(SSIMP_CAD)[colnames(SSIMP_CAD) == "z_imp"] <- "z"
colnames(SSIMP_CAD)[colnames(SSIMP_CAD) == "SNP"] <- "snp"

colnames(SSIMP_C_CANCER)[colnames(SSIMP_C_CANCER) == "Allele1"] <- "A2"
colnames(SSIMP_C_CANCER)[colnames(SSIMP_C_CANCER) == "Allele2"] <- "A1"
colnames(SSIMP_C_CANCER)[colnames(SSIMP_C_CANCER) == "z_imp"] <- "z"
colnames(SSIMP_C_CANCER)[colnames(SSIMP_C_CANCER) == "SNP"] <- "snp"

colnames(SSIMP_D_LID)[colnames(SSIMP_D_LID) == "Allele1"] <- "A2"
colnames(SSIMP_D_LID)[colnames(SSIMP_D_LID) == "Allele2"] <- "A1"
colnames(SSIMP_D_LID)[colnames(SSIMP_D_LID) == "z_imp"] <- "z"
colnames(SSIMP_D_LID)[colnames(SSIMP_D_LID) == "SNP"] <- "snp"

colnames(SSIMP_EP)[colnames(SSIMP_EP) == "Allele1"] <- "A2"
colnames(SSIMP_EP)[colnames(SSIMP_EP) == "Allele2"] <- "A1"
colnames(SSIMP_EP)[colnames(SSIMP_EP) == "z_imp"] <- "z"
colnames(SSIMP_EP)[colnames(SSIMP_EP) == "SNP"] <- "snp"

colnames(SSIMP_GFR_AFR)[colnames(SSIMP_GFR_AFR) == "Allele1"] <- "A2"
colnames(SSIMP_GFR_AFR)[colnames(SSIMP_GFR_AFR) == "Allele2"] <- "A1"
colnames(SSIMP_GFR_AFR)[colnames(SSIMP_GFR_AFR) == "z_imp"] <- "z"
colnames(SSIMP_GFR_AFR)[colnames(SSIMP_GFR_AFR) == "SNP"] <- "snp"

colnames(SSIMP_GFR_EUR)[colnames(SSIMP_GFR_EUR) == "Allele1"] <- "A2"
colnames(SSIMP_GFR_EUR)[colnames(SSIMP_GFR_EUR) == "Allele2"] <- "A1"
colnames(SSIMP_GFR_EUR)[colnames(SSIMP_GFR_EUR) == "z_imp"] <- "z"
colnames(SSIMP_GFR_EUR)[colnames(SSIMP_GFR_EUR) == "SNP"] <- "snp"

colnames(SSIMP_UACR)[colnames(SSIMP_UACR) == "Allele1"] <- "A2"
colnames(SSIMP_UACR)[colnames(SSIMP_UACR) == "Allele2"] <- "A1"
colnames(SSIMP_UACR)[colnames(SSIMP_UACR) == "z_imp"] <- "z"
colnames(SSIMP_UACR)[colnames(SSIMP_UACR) == "SNP"] <- "snp"




# Rename DIST datasets
colnames(DIST_ADHD)[colnames(DIST_ADHD) == "a1"] <- "A2"
colnames(DIST_ADHD)[colnames(DIST_ADHD) == "a2"] <- "A1"
colnames(DIST_ADHD)[colnames(DIST_ADHD) == "rsid"] <- "snp"

colnames(DIST_CAD)[colnames(DIST_CAD) == "a1"] <- "A2"
colnames(DIST_CAD)[colnames(DIST_CAD) == "a2"] <- "A1"
colnames(DIST_CAD)[colnames(DIST_CAD) == "rsid"] <- "snp"

colnames(DIST_C_CANCER)[colnames(DIST_C_CANCER) == "a1"] <- "A2"
colnames(DIST_C_CANCER)[colnames(DIST_C_CANCER) == "a2"] <- "A1"
colnames(DIST_C_CANCER)[colnames(DIST_C_CANCER) == "rsid"] <- "snp"

colnames(DIST_D_LID)[colnames(DIST_D_LID) == "a1"] <- "A2"
colnames(DIST_D_LID)[colnames(DIST_D_LID) == "a2"] <- "A1"
colnames(DIST_D_LID)[colnames(DIST_D_LID) == "rsid"] <- "snp"

colnames(DIST_EP)[colnames(DIST_EP) == "a1"] <- "A2"
colnames(DIST_EP)[colnames(DIST_EP) == "a2"] <- "A1"
colnames(DIST_EP)[colnames(DIST_EP) == "rsid"] <- "snp"

colnames(DIST_GFR_AFR)[colnames(DIST_GFR_AFR) == "a1"] <- "A2"
colnames(DIST_GFR_AFR)[colnames(DIST_GFR_AFR) == "a2"] <- "A1"
colnames(DIST_GFR_AFR)[colnames(DIST_GFR_AFR) == "rsid"] <- "snp"

colnames(DIST_GFR_EUR)[colnames(DIST_GFR_EUR) == "a1"] <- "A2"
colnames(DIST_GFR_EUR)[colnames(DIST_GFR_EUR) == "a2"] <- "A1"
colnames(DIST_GFR_EUR)[colnames(DIST_GFR_EUR) == "rsid"] <- "snp"

colnames(DIST_UACR)[colnames(DIST_UACR) == "a1"] <- "A2"
colnames(DIST_UACR)[colnames(DIST_UACR) == "a2"] <- "A1"
colnames(DIST_UACR)[colnames(DIST_UACR) == "rsid"] <- "snp"


# Function to calculate and analyze imputation results
analyze_imputation <- function(true_file, imputed_file, trait_name, method_name, delim = "\t") {
  
  get_complement <- function(allele) {
    case_when(
      allele == "A" ~ "T",
      allele == "T" ~ "A",
      allele == "C" ~ "G",
      allele == "G" ~ "C",
      
    )
  }
  
 
  # Calculate number of imputed SNPs
  imputed_snps <- nrow(imputed_data)
  
  # Join data on "snp"
  joined_data <- inner_join(true_data, imputed_data, by = "snp", suffix = c("_true", "_imp"))
  
  # Add complement alleles to the dataset
  joined_data <- joined_data %>%
    mutate(
      REF_complement = get_complement(A2_imp),
      ALT_complement = get_complement(A1_imp)
    )
  # Modify z values
  joined_data <-joined_data %>%
    mutate(
      z_true = case_when(
        A1_imp == A1_true & A2_imp == A2_true ~ -z_true,
        A1_imp == A2_true & A2_imp == A1_true ~ z_true,
        ALT_complement == A1_true & REF_complement == A2_true~ -z_true,
        ALT_complement == A2_true & REF_complement == A1_true ~ z_true,
        TRUE ~ NA_real_
      )
    )
  
  
  
  # Calculate coverage percentage
  coverage <- (nrow(joined_data) / nrow(true_data)) * 100
  
  # Linear model for Z-scores
  z_model <- lm(joined_data$z_imp ~ joined_data$z_true)
  r2_z <- summary(z_model)$r.squared
  
  
  # Create the scatter plot
  plot(
    joined_data$z_true, joined_data$z_imp,
    main = paste0(trait_name, " - ", method_name, " (R² = ", round(r2_z, 4), ")"),
    xlab = "Actual z values",
    ylab = "Imputed z values",
    pch = 20,
    col = 'black'
  )
  grid()
  abline(a = 0, b = 1, col = 'red', lwd = 2)
  
  
  
  
  
   # Calculate -log10(p) values
  joined_data <- joined_data %>%
    mutate(
      log10p_true = -log10( 2*(pnorm(-abs(z_imp)))),
      log10p_imp = -log10( 2*(pnorm(-abs(z_true)))) 
    )
  
  # Linear model for -log10(p)
  log10p_model <- lm(joined_data$log10p_imp ~ joined_data$log10p_true)
  r2_log10p <- summary(log10p_model)$r.squared
  
  
  # Create the scatter plot
  plot(
    joined_data$log10p_true, joined_data$log10p_imp,
    main = paste0(trait_name, " - ", method_name, " (R² = ", round(r2_log10p, 4), ")"),
    xlab = "Actual -log10(p) values",
    ylab = "Imputed -log10(p) values",
    pch = 20,
    col = 'black'
  )
  grid()
  abline(a = 0, b = 1, col = 'red', lwd = 2)
  
    # Return results as a dataframe row
  return(data.frame(
    Trait = trait_name,
    Method = method_name,
    Imputed_SNPs = imputed_snps,
    R2_z =  r2_z,
    R2_log10p=r2_log10p,
    Masked_SNPs_Found = nrow(joined_data),
    Imputation_Percentage = coverage
  ))
}










# List of datasets, imputed files, and their corresponding traits and methods
datasets <- list(
  list("ADHD", ADHD, SSIMP_ADHD, "SSIMP"),
  list("ADHD", ADHD, DIST_ADHD, "DIST"), 
  
  list("CAD", CAD, SSIMP_CAD, "SSIMP"),
  list("CAD", CAD, DIST_CAD, "DIST"),
 
  list("Colorectal Cancer", Colorectal_Cancer, SSIMP_C_CANCER, "SSIMP"),
  list("Colorectal Cancer", Colorectal_Cancer, DIST_C_CANCER, "DIST"),
 
  
  list("Double Eyelid", Double_Eyelid, SSIMP_D_LID, "SSIMP"),
  list("Double Eyelid", Double_Eyelid, DIST_D_LID, "DIST"),
 
  
  list("Epilepsy", Epilepsy, SSIMP_EP, "SSIMP"),
  list("Epilepsy", Epilepsy, DIST_EP, "DIST"),
 
  
  list("GFR_AFR", GFR_AFR, SSIMP_GFR_AFR, "SSIMP"),
  list("GFR_AFR", GFR_AFR, DIST_GFR_AFR, "DIST"),
 
  
  list("GFR_EUR", GFR_EUR, SSIMP_GFR_EUR, "SSIMP"),
  list("GFR_EUR", GFR_EUR, DIST_GFR_EUR, "DIST"),
 
  
  list("UACR", UACR, SSIMP_UACR, "SSIMP"),
  list("UACR", UACR, DIST_UACR, "DIST")
 
)

 
# Initialize an empty dataframe to store the results
results_df <- data.frame()

# Loop through datasets and run the analyze_imputation function
for (dataset in datasets) {
  trait <- dataset[[1]]
  true_data <- dataset[[2]]
  imputed_data <- dataset[[3]]
  method <- dataset[[4]]
  
  # Analyze the imputation
  result <- analyze_imputation(true_data, imputed_data, trait, method)
  
  # Convert the result to a dataframe and append it to results_df
  results_df <- rbind(results_df, as.data.frame(result))

}

# Define the desired order of the Trait column
desired_order <- c("ADHD", "UACR", "GFR_EUR", "GFR_AFR", "Epilepsy", 
                   "Colorectal Cancer", "Double Eyelid", "CAD")

# Reorder the dataframe
results_df <- results_df %>%
  mutate(Trait = factor(Trait, levels = desired_order)) %>%
  arrange(Trait)

results_DIST <- results_df[results_df$Method=='DIST',]

 
print(results_DIST)
##                Trait Method Imputed_SNPs       R2_z  R2_log10p
## 2               ADHD   DIST       737618 0.55892841 0.40250186
## 4               UACR   DIST       742568 0.24503103 0.34866814
## 6            GFR_EUR   DIST       742593 0.18531265 0.17103536
## 8            GFR_AFR   DIST       743044 0.06865005 0.02243398
## 10          Epilepsy   DIST       451355 0.94391275 0.88891900
## 12 Colorectal Cancer   DIST       689032 0.75984161 0.50614354
## 14     Double Eyelid   DIST       716061 0.55202669 0.28167836
## 16               CAD   DIST       251662 0.90504829 0.87239415
##    Masked_SNPs_Found Imputation_Percentage
## 2               3784              99.81535
## 4                620              34.08466
## 6                639              50.31496
## 8                527              45.43103
## 10             71503              96.94139
## 12             14028              81.36891
## 14              7261              92.85166
## 16            112862              81.03129
print(mean(results_DIST$Imputed_SNPs))
## [1] 634241.6
print(mean(results_DIST$R2_z))
## [1] 0.5273439
print(mean(results_DIST$R2_log10p))
## [1] 0.4367218
print(mean(results_DIST$Imputation_Percentage))
## [1] 72.72991
results_SSIMP <- results_df[results_df$Method=='SSIMP',]

 
print(results_SSIMP)
##                Trait Method Imputed_SNPs      R2_z  R2_log10p Masked_SNPs_Found
## 1               ADHD  SSIMP      1875319 0.6149143 0.45256522              3763
## 3               UACR  SSIMP      1879310 0.1988346 0.19560801              1256
## 5            GFR_EUR  SSIMP      1879896 0.1453583 0.07090878              1111
## 7            GFR_AFR  SSIMP      3245785 0.2104478 0.07067263              1139
## 9           Epilepsy  SSIMP      1627415 0.9454356 0.89272926             73109
## 11 Colorectal Cancer  SSIMP      1783481 0.8508834 0.78770433             16512
## 13     Double Eyelid  SSIMP      1810525 0.8154542 0.68433350              7775
## 15               CAD  SSIMP      1450754 0.8677342 0.83552577            136174
##    Imputation_Percentage
## 1               99.26141
## 3               69.04893
## 5               87.48031
## 7               98.18966
## 9               99.11875
## 11              95.77726
## 13              99.42455
## 15              97.76856
print(mean(results_SSIMP$Imputed_SNPs))
## [1] 1944061
print(mean(results_SSIMP$R2_z))
## [1] 0.5811328
print(mean(results_SSIMP$R2_log10p))
## [1] 0.4987559
print(mean(results_SSIMP$Imputation_Percentage))
## [1] 93.25868

FAPI

# Imports
library(readxl)
library(readr)
library(dplyr)
 
# ADHD dataset
ADHD <- read.delim(
  "imp_real_values_harm_new_input/ADHD_chrom1_real_values_new_input.txt", 
  sep = "\t", 
  
  stringsAsFactors = FALSE 
)

# CAD dataset
CAD <- read.delim(
  "imp_real_values_harm_new_input/CAD_EUR_chr1_3925_real_values_new_input.txt", 
  sep = "\t", 
  
  stringsAsFactors = FALSE 
)

# Colorectal Cancer dataset
Colorectal_Cancer <- read.delim(
  "imp_real_values_harm_new_input/Colorectal_Cancer_4097_EAS_chrom1_real_values_new_input.txt", 
  sep = "\t", 
  
  stringsAsFactors = FALSE 
)

# Double Eyelid dataset
Double_Eyelid <- read.delim(
  "imp_real_values_harm_new_input/Double_eylid_4021_EAS_chorm1_real_values_new_input.txt", 
  sep = "\t", 
  
  stringsAsFactors = FALSE 
)

# Epilepsy dataset
Epilepsy <- read.delim(
  "imp_real_values_harm_new_input/Epilepsy_chrom1_real_values_new_input.txt", 
  sep = "\t", 
  
  stringsAsFactors = FALSE 
)

# GFR_AFR dataset
GFR_AFR <- read.delim(
  "imp_real_values_harm_new_input/GFR_AFR_4054_chrom1_real_values_new_input.txt", 
  sep = "\t", 
  
  stringsAsFactors = FALSE )

# GFR_EUR dataset
GFR_EUR <- read.delim(
  "imp_real_values_harm_new_input/GFR_EUR_4053_chrom1_real_values_new_input.txt", 
  sep = "\t", 
  
  stringsAsFactors = FALSE )

# UACR dataset
UACR <- read.delim(
  "imp_real_values_harm_new_input/UACR_chrom1_real_values_new_input.txt", 
  sep = "\t", 
  
  stringsAsFactors = FALSE )





#Read all FAPI results
FAPI_ADHD <- read_delim("FAPI/fapi_results/FAPI_ADHD_EUR_chr1.txt.impute.txt", delim = "\t", escape_double = FALSE, trim_ws = TRUE)
FAPI_CAD <- read_delim("FAPI/fapi_results/FAPI_CAD_EUR_chr1.txt.impute.txt", delim = "\t", escape_double = FALSE, trim_ws = TRUE)
FAPI_C_CANCER <- read_delim("FAPI/fapi_results/FAPI_COL_CANCER_EAS_chr1.txt.impute.txt", delim = "\t", escape_double = FALSE, trim_ws = TRUE)
FAPI_D_LID <- read_delim("FAPI/fapi_results/FAPI_DLID_EAS_chr1.txt.impute.txt", delim = "\t", escape_double = FALSE, trim_ws = TRUE)
FAPI_EP <-  read_delim("FAPI/fapi_results/FAPI_EPILEPSY_EUR_chr1.txt.impute.txt", delim = "\t", escape_double = FALSE, trim_ws = TRUE)
FAPI_GFR_AFR <- read_delim("FAPI/fapi_results/FAPI_GFR_AFR_chr1.txt.impute.txt", delim = "\t", escape_double = FALSE, trim_ws = TRUE)
FAPI_GFR_EUR <- read_delim("FAPI/fapi_results/FAPI_GFR_EUR_chr1.txt.impute.txt", delim = "\t", escape_double = FALSE, trim_ws = TRUE)
FAPI_UACR <-  read_delim("FAPI/fapi_results/FAPI_UACR_EUR_chr1.txt.impute.txt", delim = "\t", escape_double = FALSE, trim_ws = TRUE)





#FILTER FAPI

#Read all FAPI results
FAPI_ADHD <-  FAPI_ADHD[FAPI_ADHD$Type == 'I',]
FAPI_CAD <-  FAPI_CAD[FAPI_CAD$Type == 'I',]
FAPI_C_CANCER <- FAPI_C_CANCER[FAPI_C_CANCER$Type == 'I',]
FAPI_D_LID <-  FAPI_D_LID[FAPI_D_LID$Type == 'I',]
FAPI_EP <-  FAPI_EP[FAPI_EP$Type == 'I',]
FAPI_GFR_AFR <- FAPI_GFR_AFR[FAPI_GFR_AFR$Type == 'I',] 
FAPI_GFR_EUR <- FAPI_GFR_EUR[FAPI_GFR_EUR$Type == 'I',]
FAPI_UACR <-   FAPI_UACR[FAPI_UACR$Type == 'I',]




colnames(FAPI_ADHD)[colnames(FAPI_ADHD) == "ID"] <- "snp"
colnames(FAPI_CAD)[colnames(FAPI_CAD) == "ID"] <- "snp"
colnames(FAPI_C_CANCER)[colnames(FAPI_C_CANCER) == "ID"] <- "snp"
colnames(FAPI_D_LID)[colnames(FAPI_D_LID) == "ID"] <- "snp"
colnames(FAPI_EP)[colnames(FAPI_EP) == "ID"] <- "snp"
colnames(FAPI_GFR_AFR)[colnames(FAPI_GFR_AFR) == "ID"] <- "snp"
colnames(FAPI_GFR_EUR)[colnames(FAPI_GFR_EUR) == "ID"] <- "snp"
colnames(FAPI_UACR)[colnames(FAPI_UACR) == "ID"] <- "snp"





# Function to calculate and analyze imputation results
analyze_imputation <- function(true_data, imputed_data, trait_name, method_name, delim = "\t") {
  
  get_complement <- function(allele) {
    case_when(
      allele == "A" ~ "T",
      allele == "T" ~ "A",
      allele == "C" ~ "G",
      allele == "G" ~ "C",
      
    )
  }
   
  
  # Calculate number of imputed SNPs
  imputed_snps <- nrow(imputed_data)
  
  # Join data on "snp"
  joined_data <- inner_join(true_data, imputed_data, by = "snp",suffix= c("_true","_imp") )
   # Calculate coverage percentage
  coverage <- (nrow(joined_data) / nrow(true_data)) * 100
  
 
  
  # Calculate -log10(p) values
  joined_data <- joined_data %>%
    mutate(
      log10p_true = -log10( 2*(pnorm(-abs(z)))),
      log10p_imp = -log10(P)  
    )
  
  # Linear model for -log10(p)
  log10p_model <- lm(joined_data$log10p_imp ~ joined_data$log10p_true)
  r2_log10p <- summary(log10p_model)$r.squared
  
  
  # Create the scatter plot
  plot(
    joined_data$log10p_true, joined_data$log10p_imp,
    main = paste0(trait_name, " - ", method_name, " (R² = ", round(r2_log10p, 4), ")"),
    xlab = "Actual -log10(p) values",
    ylab = "Imputed -log10(p) values",
    pch = 20,
    col = 'black'
  )
  grid()
  abline(a = 0, b = 1, col = 'red', lwd = 2)
  
  
  
  # Return results as a dataframe row
  return(data.frame(
    Trait = trait_name,
    Method = method_name,
    Imputed_SNPs = imputed_snps,
 
    R2_log10p=r2_log10p,
    Masked_SNPs_Found = nrow(joined_data),
    Imputation_Percentage = coverage
  ))
}




# List of datasets, imputed files, and their corresponding traits and methods
datasets <- list(
  
  list("ADHD", ADHD, FAPI_ADHD, "FAPI"),
  
  
  list("CAD", CAD, FAPI_CAD, "FAPI"),
  
  list("Colorectal Cancer", Colorectal_Cancer, FAPI_C_CANCER, "FAPI"),
  
  list("Double Eyelid", Double_Eyelid, FAPI_D_LID, "FAPI"),
  
  
  list("Epilepsy", Epilepsy, FAPI_EP, "FAPI"),
  
  
  list("GFR_AFR", GFR_AFR, FAPI_GFR_AFR, "FAPI"),
  
  list("GFR_EUR", GFR_EUR, FAPI_GFR_EUR, "FAPI"),
  
  list("UACR", UACR, FAPI_UACR, "FAPI")
)


# Initialize an empty dataframe to store the results
results_df <- data.frame()

# Loop through datasets and run the analyze_imputation function
for (dataset in datasets) {
  trait <- dataset[[1]]
  true_data <- dataset[[2]]
  imputed_data <- dataset[[3]]
  method <- dataset[[4]]
  
  # Analyze the imputation
  result <- analyze_imputation(true_data, imputed_data, trait, method)
  
  # Convert the result to a dataframe and append it to results_df
  results_df <- rbind(results_df, as.data.frame(result))
  
  
}

desired_order <- c("ADHD", "UACR", "GFR_EUR", "GFR_AFR", "Epilepsy", 
                   "Colorectal Cancer", "Double Eyelid", "CAD")

# Reorder the dataframe
results_df <- results_df %>%
  mutate(Trait = factor(Trait, levels = desired_order)) %>%
  arrange(Trait)

 
print(results_df)
##               Trait Method Imputed_SNPs R2_log10p Masked_SNPs_Found
## 1              ADHD   FAPI       262431 0.7616602               949
## 2              UACR   FAPI        47068 0.9474013                90
## 3           GFR_EUR   FAPI        44498 0.8996893                84
## 4           GFR_AFR   FAPI        47232 0.5144196                83
## 5          Epilepsy   FAPI       182768 0.9227724             66251
## 6 Colorectal Cancer   FAPI       312408 0.9126823             10851
## 7     Double Eyelid   FAPI       279728 0.7602837              3437
## 8               CAD   FAPI       170791 0.2396059             98148
##   Imputation_Percentage
## 1             25.032973
## 2              4.947774
## 3              6.614173
## 4              7.155172
## 5             89.820903
## 6             62.940835
## 7             43.951407
## 8             70.467110
print(mean(results_df$Imputed_SNPs))
## [1] 168365.5
print(mean(results_df$R2_log10p))
## [1] 0.7448143
print(mean(results_df$Imputation_Percentage))
## [1] 38.86629